library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(leaps)
library(ggpmisc)
## Loading required package: ggpp
##
## Attaching package: 'ggpp'
##
## The following object is masked from 'package:ggplot2':
##
## annotate
library(ggthemes)
match_dir = 'data/matchups/'
rs_dir = 'data/upstreamRS/'
is_dir = 'data/in-situ/'
# Set seed for reproducibility
set.seed(799)
This script uses stepwise regression as an alternative modeling approach for the Lake Yojoa Secchi data.
match5 = read.csv(file.path(match_dir, 'fiveDay_LS-Secchi_matchups_n237.csv'))
match71 = read.csv(file.path(match_dir, 'multiDay_LS-Secchi_matchups_n238.csv'))
stack = read.csv(file.path(rs_dir, 'yojoa_corr_rrs_met_scaled_v2023-06-15.csv'))
secchi = read.csv(file.path(is_dir, 'Secchi_completedataset.csv')) %>%
mutate(secchi = as.numeric(secchi),
date = mdy(date)) %>%
filter(!is.na(secchi)) %>%
mutate(location = gsub(' ', '', location))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `secchi = as.numeric(secchi)`.
## Caused by warning:
## ! NAs introduced by coercion
prepData = function(df) {
#make a rowid column
df_prep = df %>%
rowid_to_column() %>%
mutate(secchi = as.numeric(secchi)) %>% #there's one wonky value in here with two decimal points... dropping from this analysis
filter(!is.na(secchi))
}
match5 = prepData(match5)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `secchi = as.numeric(secchi)`.
## Caused by warning:
## ! NAs introduced by coercion
match71 = prepData(match71)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `secchi = as.numeric(secchi)`.
## Caused by warning:
## ! NAs introduced by coercion
stack = stack %>%
filter(date < as.Date('2023-01-01'))
Make train/test/validation sets
70% train, 30% test. We use a cross-validation technique in the stepwise regression model that acts as a train/test (on the 70% data), and use the remaining 30% as a holdout for validation.
##Pull 30% as holdout test data
test_match5 <- match5 %>%
sample_frac(0.3)
## Remove holdout data
train_match5 <- match5 %>% filter(!rowid %in% test_match5$rowid)
test_match71 <- match71 %>%
sample_frac(0.3)
train_match71 <- match71 %>% filter(!rowid %in% test_match71$rowid)
Point to the features for model and subset data for those vars. We’ll use scaled variables in this method since linear-based models are susceptible to differing variable ranges.
vars_3m = c('secchi',
'med_Blue_corr_scaled', 'med_Red_corr_scaled', 'med_Green_corr_scaled',
'med_Nir_corr_scaled',
'RN_scaled', 'BG_scaled', 'RB_scaled', 'GB_scaled',
"tot_sol_rad_KJpm2_3_scaled", "max_temp_degK_3_scaled", "mean_temp_degK_3_scaled", "min_temp_degK_3_scaled",
"tot_precip_m_3_scaled", "mean_wind_mps_3_scaled")
vars_5m = c('secchi',
'med_Blue_corr_scaled', 'med_Red_corr_scaled', 'med_Green_corr_scaled',
'med_Nir_corr_scaled',
'RN_scaled', 'BG_scaled', 'RB_scaled', 'GB_scaled',
"tot_sol_rad_KJpm2_5_scaled", "max_temp_degK_5_scaled", "mean_temp_degK_5_scaled", "min_temp_degK_5_scaled",
"tot_precip_m_5_scaled", "mean_wind_mps_5_scaled")
vars_51m = c('secchi',
'med_Blue_corr_scaled', 'med_Red_corr_scaled', 'med_Green_corr_scaled',
'med_Nir_corr_scaled',
'RN_scaled', 'BG_scaled', 'RB_scaled', 'GB_scaled',
"tot_sol_rad_KJpm2_5_scaled", "max_temp_degK_5_scaled", "mean_temp_degK_5_scaled", "min_temp_degK_5_scaled",
"tot_precip_m_5_scaled", "mean_wind_mps_5_scaled",
"precip_m_prev_scaled", "air_temp_degK_prev_scaled", "wind_speed_mps_prev_scaled")
vars_71m = c('secchi',
'med_Blue_corr_scaled', 'med_Red_corr_scaled', 'med_Green_corr_scaled',
'med_Nir_corr_scaled',
'RN_scaled', 'BG_scaled', 'RB_scaled', 'GB_scaled',
"tot_sol_rad_KJpm2_7_scaled", "max_temp_degK_7_scaled", "mean_temp_degK_7_scaled", "min_temp_degK_7_scaled",
"tot_precip_m_7_scaled", "mean_wind_mps_7_scaled",
"precip_m_prev_scaled", "air_temp_degK_prev_scaled", "wind_speed_mps_prev_scaled")
#subset for only vars for modeling
train_match_5d_3m = train_match5 %>% select(all_of(vars_3m))
train_match_5d_5m = train_match5 %>% select(all_of(vars_5m))
train_match_5d_51m = train_match5 %>% select(all_of(vars_51m))
train_match_5d_71m = train_match5 %>% select(all_of(vars_71m))
train_match_71d_3m = train_match71 %>% select(all_of(vars_3m))
train_match_71d_5m = train_match71 %>% select(all_of(vars_5m))
train_match_71d_51m = train_match71 %>% select(all_of(vars_51m))
train_match_71d_71m = train_match71 %>% select(all_of(vars_71m))
And run the model
# Set up repeated k-fold cross-validation
train.control <- trainControl(method = "cv", number = 5)
# Train the models
step.model.5d3m <- train(secchi ~ ., data = train_match_5d_3m,
method = "leapBackward",
tuneGrid = data.frame(nvmax = 1:13),
trControl = train.control
)
save(step.model.5d3m, file = 'data/models/stepreg_5d_3m.RData')
step.model.5d5m <- train(secchi ~ ., data = train_match_5d_5m,
method = "leapBackward",
tuneGrid = data.frame(nvmax = 1:13),
trControl = train.control
)
save(step.model.5d5m, file = 'data/models/stepreg_5d_5m.RData')
step.model.5d51m <- train(secchi ~ ., data = train_match_5d_51m,
method = "leapBackward",
tuneGrid = data.frame(nvmax = 1:13),
trControl = train.control
)
save(step.model.5d51m, file = 'data/models/stepreg_5d_51m.RData')
step.model.5d71m <- train(secchi ~ ., data = train_match_5d_71m,
method = "leapBackward",
tuneGrid = data.frame(nvmax = 1:13),
trControl = train.control
)
save(step.model.5d71m, file = 'data/models/stepreg_5d_71m.RData')
step.model.71d3m <- train(secchi ~ ., data = train_match_71d_3m,
method = "leapBackward",
tuneGrid = data.frame(nvmax = 1:13),
trControl = train.control
)
save(step.model.71d3m, file = 'data/models/stepreg_71d_3m.RData')
step.model.71d5m <- train(secchi ~ ., data = train_match_71d_5m,
method = "leapBackward",
tuneGrid = data.frame(nvmax = 1:13),
trControl = train.control
)
save(step.model.71d5m, file = 'data/models/stepreg_71d_5m.RData')
step.model.71d51m <- train(secchi ~ ., data = train_match_71d_51m,
method = "leapBackward",
tuneGrid = data.frame(nvmax = 1:13),
trControl = train.control
)
save(step.model.71d51m, file = 'data/models/stepreg_71d_51m.RData')
step.model.71d71m <- train(secchi ~ ., data = train_match_71d_71m,
method = "leapBackward",
tuneGrid = data.frame(nvmax = 1:13),
trControl = train.control
)
save(step.model.71d71m, file = 'data/models/stepreg_71d_71m.RData')
Load models and see results
load('data/models/stepreg_5d_3m.RData')
load('data/models/stepreg_5d_5m.RData')
load('data/models/stepreg_5d_51m.RData')
load('data/models/stepreg_5d_71m.RData')
load('data/models/stepreg_71d_3m.RData')
load('data/models/stepreg_71d_5m.RData')
load('data/models/stepreg_71d_51m.RData')
load('data/models/stepreg_71d_71m.RData')
step.model.5d3m$results
## nvmax RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 1.1032429 0.1874457 0.8567915 0.2183780 0.06479765 0.10177316
## 2 2 1.0485653 0.2986517 0.8114409 0.1998904 0.13405273 0.08362951
## 3 3 1.0206205 0.3322865 0.7886529 0.1939597 0.09346274 0.07893003
## 4 4 1.0358397 0.3242732 0.8038943 0.2174186 0.10114298 0.10469880
## 5 5 1.0504992 0.3315036 0.8086616 0.2297186 0.13194620 0.11943692
## 6 6 1.0228337 0.3614978 0.7940941 0.2319763 0.10697535 0.11902551
## 7 7 1.0196162 0.3551042 0.7842735 0.2122180 0.08413331 0.10406244
## 8 8 1.0213774 0.3514618 0.7797615 0.2062623 0.08429905 0.09732226
## 9 9 1.0228340 0.3373657 0.7812834 0.1851233 0.08154461 0.08579817
## 10 10 1.0117032 0.3549801 0.7733825 0.1972396 0.08957721 0.09275924
## 11 11 0.9933357 0.3496779 0.7613964 0.1944121 0.09847722 0.09166609
## 12 12 1.0012619 0.3430719 0.7652865 0.1976063 0.10348569 0.09149264
## 13 13 1.0020544 0.3414635 0.7658871 0.1968439 0.10707619 0.09063836
step.model.5d5m$results
## nvmax RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 1.078093 0.2178674 0.8358408 0.2030182 0.09136651 0.09581566
## 2 2 1.027931 0.2898345 0.7921101 0.1867123 0.13122820 0.07086063
## 3 3 1.000071 0.3259312 0.7742615 0.2019342 0.11818093 0.07693482
## 4 4 1.023175 0.3023978 0.7892372 0.2256260 0.15233117 0.09777986
## 5 5 1.015743 0.3179326 0.7744445 0.2172694 0.13974988 0.07919821
## 6 6 1.007473 0.3369642 0.7680075 0.2257451 0.12759115 0.09918441
## 7 7 0.987233 0.3581665 0.7512786 0.2081486 0.12357281 0.08090477
## 8 8 0.982642 0.3632435 0.7472672 0.2029871 0.13951669 0.07158389
## 9 9 1.022765 0.3341714 0.7872648 0.2550456 0.14329289 0.11982150
## 10 10 1.023099 0.3350399 0.7809363 0.2431332 0.13082754 0.10166337
## 11 11 1.117110 0.3290074 0.8067514 0.4747506 0.15483967 0.18379705
## 12 12 1.146137 0.3219203 0.8152426 0.5308223 0.15313582 0.19665491
## 13 13 1.142150 0.3269681 0.8124143 0.5327264 0.15931239 0.20182069
step.model.5d51m$results
## nvmax RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 1.098130 0.2001358 0.8564772 0.2240700 0.09748508 0.1175524
## 2 2 1.165918 0.2010287 0.8508541 0.3301525 0.19541699 0.1363124
## 3 3 1.157802 0.1966317 0.8394217 0.3284846 0.18522681 0.1425394
## 4 4 1.143813 0.2004710 0.8243993 0.3009594 0.16513879 0.1287444
## 5 5 1.144620 0.2276949 0.8184678 0.3279853 0.18895536 0.1437608
## 6 6 1.132649 0.2502046 0.7948051 0.3424874 0.19385942 0.1558922
## 7 7 1.130324 0.2581214 0.7889403 0.3400633 0.18658872 0.1609453
## 8 8 1.092875 0.2764862 0.7678240 0.2993261 0.18020859 0.1341454
## 9 9 1.120435 0.2638391 0.7931030 0.3187314 0.18516256 0.1298837
## 10 10 1.118777 0.2666543 0.7978180 0.3224811 0.18305769 0.1351415
## 11 11 1.077584 0.2765307 0.7870469 0.2746664 0.17667937 0.1249493
## 12 12 1.066821 0.2869644 0.7776470 0.2612771 0.17157170 0.1116038
## 13 13 1.070846 0.2902548 0.7757357 0.2693807 0.17853401 0.1104554
step.model.5d71m$results
## nvmax RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 1.0695674 0.2342970 0.8452847 0.2569159 0.10570319 0.1644237
## 2 2 1.0624214 0.2455655 0.8286370 0.2642213 0.12259926 0.1714089
## 3 3 1.0350036 0.2898215 0.8069600 0.2496393 0.09160682 0.1624269
## 4 4 0.9686113 0.3838162 0.7392303 0.2726899 0.15747527 0.1899473
## 5 5 0.9837319 0.3766095 0.7386043 0.2591234 0.11872113 0.1679212
## 6 6 0.9733945 0.3944662 0.7185621 0.2500583 0.10725991 0.1676269
## 7 7 0.9803366 0.3888802 0.7221282 0.2219803 0.08046479 0.1462190
## 8 8 0.9858721 0.3823211 0.7400189 0.2147284 0.07053869 0.1418318
## 9 9 0.9957340 0.3810698 0.7377929 0.2256028 0.07062463 0.1519047
## 10 10 0.9846874 0.3957098 0.7295401 0.2241338 0.06813085 0.1503491
## 11 11 0.9550835 0.4017814 0.7117512 0.2286637 0.06137793 0.1489515
## 12 12 0.9627398 0.3950876 0.7240554 0.2369832 0.06650663 0.1452533
## 13 13 0.9610340 0.4019067 0.7207223 0.2421013 0.07518415 0.1539182
step.model.71d3m$results
## nvmax RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 1.192603 0.1225233 0.9258837 0.09979096 0.06828576 0.09096172
## 2 2 1.115690 0.2430032 0.8888688 0.13023438 0.08576233 0.09365044
## 3 3 1.041291 0.3563797 0.8173175 0.11326419 0.11009822 0.05116437
## 4 4 1.014414 0.3810868 0.7733222 0.12454259 0.11248354 0.04940530
## 5 5 1.005052 0.3993918 0.7600376 0.14668829 0.10560445 0.07023516
## 6 6 2.651578 0.3003428 1.0719805 3.59892870 0.16324343 0.64728575
## 7 7 2.806543 0.3164407 1.0948258 3.98118148 0.17170617 0.72728900
## 8 8 2.754671 0.3082456 1.0985753 3.82943802 0.16850025 0.70036873
## 9 9 2.707095 0.3092064 1.0868441 3.73028549 0.16748628 0.68300173
## 10 10 2.333519 0.3082367 1.0279190 2.89776127 0.16967418 0.54898737
## 11 11 2.438657 0.3057392 1.0498621 3.12578851 0.16730928 0.59204891
## 12 12 2.567097 0.3059320 1.0698729 3.41162305 0.16734000 0.64034788
## 13 13 2.562623 0.3076680 1.0690713 3.40704316 0.16838840 0.64063705
step.model.71d5m$results
## nvmax RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 1.1251832 0.2426648 0.8475081 0.2142379 0.13569577 0.10312184
## 2 2 1.0776116 0.3043094 0.8278432 0.2140637 0.11163697 0.11409165
## 3 3 1.0760143 0.3203419 0.8233468 0.2089656 0.08505699 0.07427513
## 4 4 1.0690399 0.3206150 0.8218974 0.2354632 0.09809733 0.08696628
## 5 5 1.0501757 0.3468879 0.7892654 0.2484399 0.12823780 0.10782380
## 6 6 1.0210822 0.3852714 0.7683352 0.2792016 0.16251762 0.13563065
## 7 7 1.0008376 0.4033404 0.7387540 0.2663732 0.14585957 0.13024869
## 8 8 0.9946182 0.4061086 0.7287433 0.2520350 0.14461043 0.13860157
## 9 9 0.9861469 0.4206873 0.7224389 0.2597831 0.15075089 0.14229940
## 10 10 0.9874899 0.4205008 0.7218382 0.2505282 0.14352931 0.14275457
## 11 11 0.9823203 0.4270443 0.7169985 0.2647974 0.16618677 0.16196952
## 12 12 0.9715327 0.4367353 0.7085334 0.2528655 0.15056115 0.15082393
## 13 13 0.9679659 0.4409189 0.7058185 0.2524640 0.14829467 0.14547780
step.model.71d51m$results
## nvmax RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 1.1302551 0.1993811 0.8840451 0.1896416 0.1545375 0.1010885
## 2 2 1.0821479 0.2660692 0.8459889 0.2005610 0.1459179 0.1045592
## 3 3 1.0265304 0.3284076 0.7981428 0.2127217 0.1070717 0.1055142
## 4 4 1.0455835 0.3151316 0.7969163 0.2336830 0.1397578 0.1092294
## 5 5 1.0124257 0.3556200 0.7608918 0.2304737 0.1177881 0.1250256
## 6 6 0.9991578 0.3762028 0.7442270 0.2246262 0.1143921 0.1245582
## 7 7 2.3646114 0.2981296 0.9955784 3.0059356 0.1861710 0.5532804
## 8 8 2.3605930 0.2956994 0.9872093 2.9782896 0.1789477 0.5189619
## 9 9 2.6009939 0.3117629 1.0149333 3.5520334 0.1856398 0.6249171
## 10 10 2.4407310 0.3165134 0.9871925 3.1948968 0.1882962 0.5591564
## 11 11 2.3934878 0.3193906 0.9711862 3.0901549 0.1902987 0.5417548
## 12 12 2.8621498 0.3190336 1.0526271 4.1290820 0.1899131 0.7207520
## 13 13 2.8111894 0.3246575 1.0441583 4.0255669 0.1925242 0.7045308
step.model.71d71m$results
## nvmax RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 1.146251 0.2081395 0.9096689 0.1751852 0.09553488 0.06728845
## 2 2 1.102032 0.2857876 0.8760276 0.1497883 0.06806347 0.10471462
## 3 3 1.050107 0.3411082 0.8197449 0.1594427 0.07964223 0.10587404
## 4 4 1.027723 0.3626925 0.7883613 0.1638086 0.09030170 0.11432758
## 5 5 2.897828 0.3310281 1.1326607 4.2698630 0.19021110 0.86579850
## 6 6 2.849418 0.3326742 1.0947252 4.1773360 0.18591769 0.83330749
## 7 7 3.051351 0.3262050 1.1338348 4.6249701 0.18044436 0.92368337
## 8 8 2.805137 0.3252127 1.0889903 4.0630959 0.18123621 0.82894826
## 9 9 2.896957 0.3249259 1.1017772 4.2771230 0.18373927 0.87152708
## 10 10 3.450692 0.3324622 1.1922777 5.5351348 0.18970899 1.08970068
## 11 11 3.579677 0.3334315 1.2162048 5.8285238 0.19056009 1.14625474
## 12 12 3.644449 0.3232747 1.2293283 5.9520608 0.19146524 1.15871534
## 13 13 3.535719 0.3204230 1.2094119 5.6984549 0.19074283 1.10898995
Looks like the only model to really dig in with is the 5-day window and the 7/1 day window with the 5 day met. Let’s look at the hold out data to see what they look like before we go any further. First, the 7/1 day window:
test_match71$predicted = predict.train(step.model.71d5m, test_match71)
ggplot(test_match71, aes(x = secchi, y = predicted)) +
geom_point() +
geom_abline(color = 'grey', lty = 2) +
# coord_cartesian(xlim = c(0, 6.5),
# ylim = c(0,6.5)) +
stat_poly_eq(aes(label = paste(after_stat(adj.rr.label))),
formula = y~x,
parse = TRUE,
label.y = Inf,
vjust = 1.3) +
labs(title = 'Yojoa Secchi Stepwise Regression Test Data\n7/1 day matchups, band and 5-day met summaries',
subtitle = 'moderate data stringency\ngrey dashed line is 1:1',
x = 'Actual Secchi (m)',
y = 'Predicted Secchi (m)') +
theme_bw()+
theme(plot.title = element_text(hjust = 0.5, face = 'bold'),
plot.subtitle = element_text(hjust = 0.5))
test_match71 %>%
filter(predicted>0) %>%
ggplot(., aes(x = secchi, y = predicted)) +
geom_point() +
geom_abline(color = 'grey', lty = 2) +
coord_cartesian(xlim = c(0, 6.5),
ylim = c(0,6.5)) +
stat_poly_eq(aes(label = paste(after_stat(adj.rr.label))),
formula = y~x,
parse = TRUE,
label.y = Inf,
vjust = 1.3) +
labs(title = 'Yojoa Secchi Stepwise Regression Test Data\n7/1 day matchups, band and 5-day met summaries',
subtitle = 'moderate data stringency\ngrey dashed line is 1:1',
x = 'Actual Secchi (m)',
y = 'Predicted Secchi (m)') +
theme_bw()+
theme(plot.title = element_text(hjust = 0.5, face = 'bold'),
plot.subtitle = element_text(hjust = 0.5))
And look at the 5-day window
test_match5$predicted = predict.train(step.model.5d5m, test_match5)
ggplot(test_match5, aes(x = secchi, y = predicted)) +
geom_point() +
geom_abline(color = 'grey', lty = 2) +
# coord_cartesian(xlim = c(0, 6.5),
# ylim = c(0,6.5)) +
stat_poly_eq(aes(label = paste(after_stat(adj.rr.label))),
formula = y~x,
parse = TRUE,
label.y = Inf,
vjust = 1.3) +
labs(title = 'Yojoa Secchi Stepwise Regression Test Data\nfive day matchups, band and 5-day met summaries',
subtitle = 'moderate data stringency\ngrey dashed line is 1:1',
x = 'Actual Secchi (m)',
y = 'Predicted Secchi (m)') +
theme_bw()+
theme(plot.title = element_text(hjust = 0.5, face = 'bold'),
plot.subtitle = element_text(hjust = 0.5))
test_match5 %>%
filter(predicted > 0) %>%
ggplot(., aes(x = secchi, y = predicted)) +
geom_point() +
geom_abline(color = 'grey', lty = 2) +
coord_cartesian(xlim = c(0, 6.5),
ylim = c(0,6.5)) +
stat_poly_eq(aes(label = paste(after_stat(adj.rr.label))),
formula = y~x,
parse = TRUE,
label.y = Inf,
vjust = 1.3) +
labs(title = 'Yojoa Secchi Stepwise Regression Test Data\nfive day matchups, band and 5-day met summaries',
subtitle = 'moderate data stringency\ngrey dashed line is 1:1',
x = 'Actual Secchi (m)',
y = 'Predicted Secchi (m)') +
theme_bw()+
theme(plot.title = element_text(hjust = 0.5, face = 'bold'),
plot.subtitle = element_text(hjust = 0.5))
Okay, well those negative values suck, so let’s take a peek at the timeseries for the 7/1 window, since the 5 day has egregious Secchi estimates for some out-of-range features.
stack = stack %>%
mutate(secchi = predict.train(object = step.model.71d5m, newdata = stack)) %>%
mutate(date = ymd(date))
secchi = secchi %>%
mutate(mission = 'Measured')
situ_stack <- full_join(stack, secchi) %>%
select(date, location, secchi, mission)
## Joining with `by = join_by(date, location, mission, secchi)`
plotRecordBySite = function(site) {
ggplot(situ_stack %>%
filter(location == site), aes(x = date, y = secchi, color = mission,
shape = mission)) +
geom_point() +
labs(title = paste0('Yojoa Secchi Historical Record - Site ', site),
subtitle = '7/1 day window, 5 day met\nmoderate data stringency\nstepwise regression',
y = 'Secchi (m)',
color = 'data source', shape = 'data source') +
scale_color_manual(values = c('grey10','grey30','grey50','grey70','blue')) +
theme_few() +
theme(legend.position = c(0.8,0.8)) +
scale_shape_manual(values = c(19,19,19,19,1)) +
scale_y_continuous(limits = c(0, max(situ_stack$secchi)), breaks = seq(0, max(situ_stack$secchi), 2)) +
scale_x_date(limits = c(min(situ_stack$date), max(situ_stack$date))) +
theme(plot.title = element_text(hjust = 0.5, face = 'bold'),
plot.subtitle = element_text(hjust = 0.5))
}
map(sort(unique(situ_stack$location)), plotRecordBySite)
## [[1]]
##
## [[2]]
##
## [[3]]
## Warning: Removed 1 rows containing missing values (`geom_point()`).
##
## [[4]]
##
## [[5]]
## Warning: Removed 1 rows containing missing values (`geom_point()`).
##
## [[6]]
##
## [[7]]
##
## [[8]]
## Warning: Removed 1 rows containing missing values (`geom_point()`).
##
## [[9]]
## Warning: Removed 1 rows containing missing values (`geom_point()`).
##
## [[10]]
##
## [[11]]
##
## [[12]]
## Warning: Removed 1 rows containing missing values (`geom_point()`).
##
## [[13]]
##
## [[14]]
## Warning: Removed 1 rows containing missing values (`geom_point()`).
##
## [[15]]
## Warning: Removed 1 rows containing missing values (`geom_point()`).
##
## [[16]]
##
## [[17]]
## Warning: Removed 2 rows containing missing values (`geom_point()`).
##
## [[18]]
plotRecentBySite = function(site) {
ggplot(situ_stack %>%
filter(location == site), aes(x = date, y = secchi, color = mission,
shape = mission)) +
geom_point() +
labs(title = paste0('Yojoa Secchi 2018-2022 - Site ', site),
subtitle = '7/1 day window, 5 day met\nmoderate data stringency\nstepwise regression',
y = 'Secchi (m)',
color = 'data source', shape = 'data source') +
scale_color_manual(values = c('grey10','grey30','grey50','grey70','blue')) +
theme_few() +
theme(legend.position = c(0.8,0.8)) +
scale_shape_manual(values = c(19,19,19,19,1)) +
scale_y_continuous(limits = c(0, max(situ_stack$secchi)), breaks = seq(0, max(situ_stack$secchi), 2)) +
scale_x_date(limits = c(ymd('2018-01-01'), max(situ_stack$date))) +
theme(plot.title = element_text(hjust = 0.5, face = 'bold'),
plot.subtitle = element_text(hjust = 0.5))
}
map(sort(unique(situ_stack$location)), plotRecentBySite)
## [[1]]
## Warning: Removed 197 rows containing missing values (`geom_point()`).
##
## [[2]]
## Warning: Removed 277 rows containing missing values (`geom_point()`).
##
## [[3]]
## Warning: Removed 259 rows containing missing values (`geom_point()`).
##
## [[4]]
## Warning: Removed 235 rows containing missing values (`geom_point()`).
##
## [[5]]
## Warning: Removed 374 rows containing missing values (`geom_point()`).
##
## [[6]]
## Warning: Removed 281 rows containing missing values (`geom_point()`).
##
## [[7]]
## Warning: Removed 248 rows containing missing values (`geom_point()`).
##
## [[8]]
## Warning: Removed 243 rows containing missing values (`geom_point()`).
##
## [[9]]
## Warning: Removed 265 rows containing missing values (`geom_point()`).
##
## [[10]]
## Warning: Removed 203 rows containing missing values (`geom_point()`).
##
## [[11]]
## Warning: Removed 260 rows containing missing values (`geom_point()`).
##
## [[12]]
## Warning: Removed 234 rows containing missing values (`geom_point()`).
##
## [[13]]
## Warning: Removed 270 rows containing missing values (`geom_point()`).
##
## [[14]]
## Warning: Removed 166 rows containing missing values (`geom_point()`).
##
## [[15]]
## Warning: Removed 210 rows containing missing values (`geom_point()`).
##
## [[16]]
## Warning: Removed 259 rows containing missing values (`geom_point()`).
##
## [[17]]
## Warning: Removed 233 rows containing missing values (`geom_point()`).
##
## [[18]]
## Warning: Removed 252 rows containing missing values (`geom_point()`).
While there is plenty of variability across the lake, let’s summarize to a single value per date, since not all sites have the same density of record. Since there are a few oddballs in here (both in terms of measured and estimated), we’ll use the median Secchi across all sites.
lake_avg <- situ_stack %>%
group_by(date,mission) %>%
summarize(across(where(is.numeric),median))
## `summarise()` has grouped output by 'date'. You can override using the
## `.groups` argument.
ggplot(lake_avg, aes(x = date, y = secchi, color = mission, shape = mission)) +
geom_point() +
scale_color_manual(values = c('grey10','grey30','grey50','grey70','blue')) +
labs(title = 'Yojoa Secchi Historical Record\nwhole-lake average',
subtitle = '7/1 day window, 5 day met\nmoderate data stringency\nstepwise regression',
y = 'median Secchi (m)',
color = 'data source', shape = 'data source') +
theme_few() +
theme(legend.position = c(0.8,0.8)) +
scale_shape_manual(values = c(19,19,19,19,1)) +
theme(plot.title = element_text(hjust = 0.5, face = 'bold'),
plot.subtitle = element_text(hjust = 0.5))
ggplot(lake_avg, aes(x = date, y = secchi, color = mission,shape = mission)) +
geom_point() +
scale_color_manual(values = c('grey10','grey30','grey50','grey70','blue')) +
theme_few() +
labs(title = 'Yojoa Secchi 2006\nwhole-lake average',
subtitle = '7/1 day window, 5 day met\nmoderate data stringency\nstepwise regression',
y = 'median Secchi (m)',
color = 'data source', shape = 'data source') +
theme(legend.position = c(0.8,0.8)) +
scale_shape_manual(values = c(19,19,19,19,1)) +
scale_x_date(limits = c(as.Date('2006-01-01'), as.Date('2006-12-31')))+
theme(plot.title = element_text(hjust = 0.5, face = 'bold'),
plot.subtitle = element_text(hjust = 0.5))
## Warning: Removed 674 rows containing missing values (`geom_point()`).
ggplot(lake_avg, aes(x = date, y = secchi, color = mission, shape = mission)) +
geom_point() +
scale_color_manual(values = c('grey10','grey30','grey50','grey70','blue')) +
theme_few() +
labs(title = 'Yojoa Secchi 2018-2022\nwhole-lake average',
subtitle = '7/1 day window, 5 day met\nmoderate data stringency\nstepwise regression',
y = 'median Secchi (m)',
color = 'data source', shape = 'data source') +
theme(legend.position = c(0.8,0.8)) +
scale_shape_manual(values = c(19,19,19,19,1)) +
scale_x_date(limits = c(as.Date('2018-01-01'), as.Date('2023-01-01')))+
theme(plot.title = element_text(hjust = 0.5, face = 'bold'),
plot.subtitle = element_text(hjust = 0.5))
## Warning: Removed 487 rows containing missing values (`geom_point()`).
varImp(step.model.71d5m)
## loess r-squared variable importance
##
## Overall
## max_temp_degK_5_scaled 100.000
## min_temp_degK_5_scaled 92.253
## med_Green_corr_scaled 89.920
## med_Red_corr_scaled 88.635
## BG_scaled 87.829
## mean_temp_degK_5_scaled 82.834
## RB_scaled 82.750
## RN_scaled 66.972
## tot_sol_rad_KJpm2_5_scaled 58.520
## med_Blue_corr_scaled 31.853
## med_Nir_corr_scaled 31.360
## tot_precip_m_5_scaled 17.149
## mean_wind_mps_5_scaled 8.816
## GB_scaled 0.000